EIA API - Data Backfill

Load libraries

import eia_api as api
import eia_etl as etl
import eia_data 
import pandas as pd
import numpy as np
import requests
import json
import os
import datetime
import plotly.express as px

API Settings:

raw_json = open("../metadata/series.json")
meta_json = json.load(raw_json)
series = pd.DataFrame(meta_json["series"])
api_path = meta_json["api_path"]

facets_template = {
  "parent" : None,
  "subba" : None
}

start = datetime.datetime(2018, 6, 20, 1)
end = datetime.datetime(2024, 2, 18, 1)

offset = 2250

eia_api_key = os.getenv('EIA_API_KEY')

meta_path = "../metadata/ciso_log_py.csv"
data_path = "../csv/ciso_grid_py.csv"
api_metadata = api.eia_metadata(api_key = eia_api_key, api_path = api_path)


print(api_metadata.meta["endPeriod"])
end = pd.to_datetime(api_metadata.meta["endPeriod"])
print(end)
2024-02-22T08
2024-02-22 08:00:00
meta_obj = eia_data.get_metadata(api_key = eia_api_key, api_path = api_path, meta_path = meta_path, series = series)
m = meta_obj.request_meta
index = meta_obj.last_index + 1
data = None
for i in m.index:

    facets = facets_template
    facets["parent"] = m.at[i, "parent"]
    facets["subba"] = m.at[i, "subba"]
    start = m.at[i, "request_start"]
    end = m.at[i, "end"]

    print(facets)
    if m.at[i, "updates_available"]:
        temp = api.eia_backfile(api_key = eia_api_key, 
            api_path = api_path+ "data", 
            facets = facets, 
            start = start.to_pydatetime(),
            end = end.to_pydatetime(),
            offset = offset) 

        ts_obj = pd.DataFrame(np.arange(start = start, stop = end + datetime.timedelta(hours = 1), step = datetime.timedelta(hours = 1)).astype(datetime.datetime), columns=["index"])
        ts_obj  = ts_obj.merge(temp.data, left_on = "index", right_on = "period", how="left")
        ts_obj.drop("period", axis = 1, inplace= True)
        ts_obj = ts_obj.rename(columns= {"index": "period"})
    else:
        ts_obj = None
        print("No new data is available")

    meta_temp = eia_data.create_metadata(data = ts_obj, start = start, end = end, type = "refresh")

    if ts_obj is None:
        meta_temp["parent"] =  m.at[i, "parent"]
        meta_temp["subba"] =  m.at[i, "subba"]


    if meta_temp["success"]:
        d = eia_data.append_data(data_path = data_path, new_data = ts_obj, save = True)
        meta_temp["update"] = True
    else:
        meta_temp["update"] = False
        meta_temp["comments"] = meta_temp["comments"] + "The data refresh failed, please check the log; "

    meta_df = pd.DataFrame([meta_temp])
    
    if data is None:
        data = ts_obj
    else:
        data = data._append(ts_obj)

    if i == series.index.start:
        meta_new = meta_df
    else:
        meta_new = meta_new._append(meta_df)
{'parent': 'CISO', 'subba': 'PGAE'}
Save the data to CSV file
{'parent': 'CISO', 'subba': 'SCE'}
Save the data to CSV file
{'parent': 'CISO', 'subba': 'SDGE'}
Save the data to CSV file
{'parent': 'CISO', 'subba': 'VEA'}
Save the data to CSV file
print(meta_new)

meta_updated = eia_data.append_metadata(meta_path = meta_path, meta = meta_new, save = True, init = False)

print(meta_updated)
  index parent subba                             time               start  \
0  None   CISO  PGAE 2024-02-23 03:34:54.866756+00:00 2024-02-18 02:00:00   
0  None   CISO   SCE 2024-02-23 03:34:56.421798+00:00 2024-02-18 02:00:00   
0  None   CISO  SDGE 2024-02-23 03:34:57.735037+00:00 2024-02-18 02:00:00   
0  None   CISO   VEA 2024-02-23 03:34:59.082815+00:00 2024-02-18 02:00:00   

                  end           start_act             end_act  start_match  \
0 2024-02-22 08:00:00 2024-02-18 02:00:00 2024-02-22 08:00:00         True   
0 2024-02-22 08:00:00 2024-02-18 02:00:00 2024-02-22 08:00:00         True   
0 2024-02-22 08:00:00 2024-02-18 02:00:00 2024-02-22 08:00:00         True   
0 2024-02-22 08:00:00 2024-02-18 02:00:00 2024-02-22 08:00:00         True   

   end_match  n_obs  na     type  update  success comments  
0       True    103   0  refresh    True     True           
0       True    103   0  refresh    True     True           
0       True    103   0  refresh    True     True           
0       True    103   0  refresh    True     True           
   index parent subba                             time               start  \
0      1   CISO  PGAE 2024-02-23 03:32:21.312337+00:00 2018-07-01 08:00:00   
1      1   CISO   SCE 2024-02-23 03:32:36.118749+00:00 2018-07-01 08:00:00   
2      1   CISO  SDGE 2024-02-23 03:32:51.011954+00:00 2018-07-01 08:00:00   
3      1   CISO   VEA 2024-02-23 03:33:06.089176+00:00 2018-07-01 08:00:00   
0      2   CISO  PGAE 2024-02-23 03:34:54.866756+00:00 2024-02-18 02:00:00   
0      2   CISO   SCE 2024-02-23 03:34:56.421798+00:00 2024-02-18 02:00:00   
0      2   CISO  SDGE 2024-02-23 03:34:57.735037+00:00 2024-02-18 02:00:00   
0      2   CISO   VEA 2024-02-23 03:34:59.082815+00:00 2024-02-18 02:00:00   

                  end           start_act             end_act  start_match  \
0 2024-02-18 01:00:00 2018-07-01 08:00:00 2024-02-18 01:00:00         True   
1 2024-02-18 01:00:00 2018-07-01 08:00:00 2024-02-18 01:00:00         True   
2 2024-02-18 01:00:00 2018-07-01 08:00:00 2024-02-18 01:00:00         True   
3 2024-02-18 01:00:00 2018-07-01 08:00:00 2024-02-18 01:00:00         True   
0 2024-02-22 08:00:00 2024-02-18 02:00:00 2024-02-22 08:00:00         True   
0 2024-02-22 08:00:00 2024-02-18 02:00:00 2024-02-22 08:00:00         True   
0 2024-02-22 08:00:00 2024-02-18 02:00:00 2024-02-22 08:00:00         True   
0 2024-02-22 08:00:00 2024-02-18 02:00:00 2024-02-22 08:00:00         True   

   end_match  n_obs  na      type  update  success  \
0       True  49386  98  backfile    True     True   
1       True  49386  98  backfile    True     True   
2       True  49386  98  backfile    True     True   
3       True  49386  98  backfile    True     True   
0       True    103   0   refresh    True     True   
0       True    103   0   refresh    True     True   
0       True    103   0   refresh    True     True   
0       True    103   0   refresh    True     True   

                      comments  
0  Missing values were found;   
1  Missing values were found;   
2  Missing values were found;   
3  Missing values were found;   
0                               
0                               
0                               
0                               

Plot the Series

We will use Plotly to visualize the series:

if data is not None:
    d = data.sort_values(by = ["subba", "period"])
    p = px.line(data, x="period", y="value", color="subba")
    p.show()
else: 
    print("No new data is available")
full_data = pd.read_csv(data_path)
full_data.head()

full_data["period"] = pd.to_datetime(full_data["period"])
p = px.line(full_data, x="period", y="value", color="subba")
p.show()